Load Packages

library(tidyverse)
Warning: package ‘tidyverse’ was built under R version 4.1.3
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
-- Attaching packages ---------------------------------------------------------------------------------------- tidyverse 1.3.1 --
√ ggplot2 3.3.5     √ purrr   0.3.4
√ tibble  3.1.6     √ dplyr   1.0.8
√ tidyr   1.2.0     √ stringr 1.4.0
√ readr   2.1.2     √ forcats 0.5.1
Warning: package ‘ggplot2’ was built under R version 4.1.3
Warning: package ‘tibble’ was built under R version 4.1.3
Warning: package ‘tidyr’ was built under R version 4.1.3
Warning: package ‘readr’ was built under R version 4.1.3
Warning: package ‘purrr’ was built under R version 4.1.3
Warning: package ‘dplyr’ was built under R version 4.1.3
Warning: package ‘stringr’ was built under R version 4.1.3
Warning: package ‘forcats’ was built under R version 4.1.3
-- Conflicts ------------------------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(ggthemes)
Warning: package ‘ggthemes’ was built under R version 4.1.3
library(scales)
Warning: package ‘scales’ was built under R version 4.1.3

Attaching package: ‘scales’

The following object is masked from ‘package:purrr’:

    discard

The following object is masked from ‘package:readr’:

    col_factor
library(readxl)
Warning: package ‘readxl’ was built under R version 4.1.3
library(plotly)
Warning: package ‘plotly’ was built under R version 4.1.3
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio

Attaching package: ‘plotly’

The following object is masked from ‘package:ggplot2’:

    last_plot

The following object is masked from ‘package:stats’:

    filter

The following object is masked from ‘package:graphics’:

    layout

Making Dataframes

color_a <- c("#58b5e1","#1c5b5a","#46ebdc","#1f4196","#e28de2","#818bd7","#e4ccf1","#82185f","#f849b6","#000000","#5e34bc","#b7d165","#30d52e","#ff5357")
color_na <- c("#1c5b5a","#46ebdc","#e28de2","#818bd7","#e4ccf1","#82185f","#f849b6","#000000","#5e34bc","#30d52e","#ff5357")
counties <- c('Anson', 'Cabarrus', 'Catawba', 'Chester', 'Cleveland', 'Gaston', 'Iredell', 'Lancaster', 'Lincoln', 'Mecklenburg', 'Rowan', 'Stanly', 'Union', 'York')
attainment_lvl <- c('Highest Degree: Less than a High School Diploma', 'Highest Degree: High School Diploma', 'Highest Degree: Some College, No Degree', "Highest Degree: Associate's Degree", "Highest Degree: Bachelor's Degree", "Highest Degree: Graduate or Professional Degree")
foreign_detail <- c('Foreign-Born: Africa', 'Foreign-Born: Asia', 'Foreign-Born: Europe', 'Foreign-Born: Latin America', 'Place of Birth Total')

countypop <- rbind(read_csv("cc-est2019-agesex-37.csv", show_col_types = F),
                   read_csv("cc-est2019-agesex-45.csv", show_col_types = F)) %>%
  select(-SUMLEV, -STATE, -COUNTY) %>%
  mutate(CTYNAME = gsub(' County', '', CTYNAME),
         YEAR = as.integer(YEAR + 2007)) %>%
  filter(CTYNAME %in% counties, YEAR >= 2010,
         !(STNAME == 'South Carolina' & CTYNAME == 'Union')) %>%
  distinct()
# Year 3 is 2010, Year 12 is 2019

# Making Charlotte Region
cr <- countypop[1:10,] %>%
  mutate(CTYNAME = 'Charlotte Region')
for(i in 4:length(colnames(countypop))) {
  for(j in 1:10){
    cr[j,i] <- sum((countypop %>% filter(YEAR == j+2009))[i])
  }
}

# Making Age & Gender data frame
pop_age_gender <- rbind(countypop, cr)
countypop <- cr %>% transmute(YEAR = YEAR, CHARLOTTEPOP = POPESTIMATE) %>% right_join(countypop, by = 'YEAR') %>% mutate(PROPORTION = POPESTIMATE / CHARLOTTEPOP) %>%
  group_by(CTYNAME) %>%
  mutate(CHANGE = ifelse(YEAR == 2010, 0, POPESTIMATE/lag(POPESTIMATE, default = first(YEAR)) - 1)) %>%
  ungroup()

pop_age_gender <- pop_age_gender %>%
  select(-contains('_TOT'), -POPEST_FEM, -POPEST_MALE, -AGE16PLUS_MALE, -AGE16PLUS_FEM, -AGE18PLUS_FEM, -AGE18PLUS_MALE, -UNDER5_FEM, -UNDER5_MALE, -AGE1544_FEM, -AGE1544_MALE, -MEDIAN_AGE_FEM, -MEDIAN_AGE_MALE, -AGE65PLUS_FEM,-AGE65PLUS_MALE, -AGE513_FEM, -AGE513_MALE, -AGE4564_FEM, -AGE4564_MALE, -AGE2544_FEM, -AGE2544_MALE, -AGE1824_FEM, -AGE1824_MALE, -AGE1417_FEM, -AGE1417_MALE) %>%
  rename(AGE004_FEM = AGE04_FEM, AGE004_MALE = AGE04_MALE, AGE0509_MALE = AGE59_MALE, AGE0509_FEM = AGE59_FEM)
pop_age_gender <- pop_age_gender %>%
  pivot_longer(cols = colnames(pop_age_gender[,5:40]), names_to = 'DEMO', values_to = 'POP') %>%
  mutate(PERCENTAGE = POP/POPESTIMATE)
pop_age_gender <- pop_age_gender %>%
  mutate(GENDER = as.factor(ifelse(grepl('MALE', pop_age_gender$DEMO),'MALE','FEMALE')),
         DEMO = gsub('_FEM','', DEMO),
         DEMO = gsub('_MALE','', DEMO),
         DEMO = case_when(DEMO == 'AGE004' ~ '0-04',
                          DEMO == 'AGE0509' ~ '05-09',
                          DEMO == 'AGE1014' ~ '10-14',
                          DEMO == 'AGE1519' ~ '15-19',
                          DEMO == 'AGE2024' ~ '20-24',
                          DEMO == 'AGE2529' ~ '25-29',
                          DEMO == 'AGE3034' ~ '30-34',
                          DEMO == 'AGE3539' ~ '35-39',
                          DEMO == 'AGE4044' ~ '40-44',
                          DEMO == 'AGE4549' ~ '45-49',
                          DEMO == 'AGE5054' ~ '50-54',
                          DEMO == 'AGE5559' ~ '55-59',
                          DEMO == 'AGE6064' ~ '60-64',
                          DEMO == 'AGE6569' ~ '65-69',
                          DEMO == 'AGE7074' ~ '70-74',
                          DEMO == 'AGE7579' ~ '75-79',
                          DEMO == 'AGE8084' ~ '80-84',
                          DEMO == 'AGE85PLUS' ~ '85 and Over'))

# Making ethnicity data frame
ethpop <- rbind(read_csv("cc-est2019-alldata-37.csv", show_col_types = F),
                   read_csv("cc-est2019-alldata-45.csv", show_col_types = F)) %>%
  mutate(CTYNAME = gsub(' County', '', CTYNAME),
         YEAR = as.integer(YEAR + 2007),
         WHITE = NHWA_MALE + NHWA_FEMALE,
         BLACK = NHBA_MALE + NHBA_FEMALE,
         HISPANIC = HWA_MALE + HWA_FEMALE + HBA_MALE + HBA_FEMALE + HIA_MALE + HIA_FEMALE + HAA_MALE + HAA_FEMALE + HNA_MALE + HNA_FEMALE + HIA_MALE + HIA_FEMALE,
         ASIAN = NHAA_MALE + NHAA_FEMALE,
         ISLANDER = NHNA_MALE + NHNA_FEMALE,
         NATIVE = NHIA_MALE + NHIA_FEMALE,
         MULTIRACIAL = TOM_MALE + TOM_FEMALE - HTOM_MALE - HTOM_FEMALE
         ) %>%
  filter(CTYNAME %in% counties, YEAR >= 3, AGEGRP == 0,
         !(STNAME == 'South Carolina' & CTYNAME == 'Union')) %>%
  select(STNAME, CTYNAME, YEAR, TOT_POP, WHITE, BLACK, HISPANIC, ASIAN, ISLANDER, NATIVE, MULTIRACIAL) %>%
  distinct()
ethpop <- ethpop %>%
  pivot_longer(cols = colnames(ethpop[,5:11]), names_to = 'ETHNICITY', values_to = 'POP')

# Making place of birth data frame
birthplace <- read.csv('Values.csv') %>%
  mutate(County = gsub(' County, North Carolina', '', County),
         County = gsub(' County, South Carolina', '', County)) %>%
  filter(Indicator == 'Place of Birth',
         County %in% counties,
         !(Measure %in% foreign_detail)) %>%
  distinct()
birthplace <- birthplace %>% inner_join((birthplace %>% group_by(County, Year) %>% summarise(Total = sum(Numerator_value))), by = c('County', 'Year'))
`summarise()` has grouped output by 'County'. You can override using the `.groups` argument.
# Making the unemployment data frame
unemployment <- rbind(read_excel('ur_anson.xlsx', trim_ws = T) %>% mutate(County = 'Anson', Period = gsub('M', '', Period)),
                      read_excel('ur_cabarrus.xlsx', trim_ws = T) %>% mutate(County = 'Cabarrus', Period = gsub('M', '', Period)),
                      read_excel('ur_catawba.xlsx', trim_ws = T) %>% mutate(County = 'Catawba', Period = gsub('M', '', Period)),
                      read_excel('ur_chester.xlsx', trim_ws=T, skip=11)[1:266,] %>% rename(Value = 'Observation Value') %>% mutate(County = 'Chester', Period = gsub('M','',Period)) %>% select(-Label),
                      read_excel('ur_cleveland.xlsx', trim_ws = T) %>% mutate(County = 'Cleveland', Period = gsub('M', '', Period)),
                      read_excel('ur_gaston.xlsx', trim_ws = T) %>% mutate(County = 'Gaston', Period = gsub('M', '', Period)),
                      read_excel('ur_iredell.xlsx', trim_ws = T) %>% mutate(County = 'Iredell', Period = gsub('M', '', Period)),
                      read_excel('ur_lancaster.xlsx', trim_ws = T) %>% mutate(County = 'Lancaster', Period = gsub('M', '', Period)),
                      read_excel('ur_lincoln.xlsx', trim_ws = T) %>% mutate(County = 'Lincoln', Period = gsub('M', '', Period)),
                      read_excel('ur_mecklenburg.xlsx', trim_ws = T) %>% mutate(County = 'Mecklenburg', Period = gsub('M', '', Period)),
                      read_excel('ur_rowan.xlsx', trim_ws = T) %>% mutate(County = 'Rowan', Period = gsub('M', '', Period)),
                      read_excel('ur_stanly.xlsx', trim_ws=T, skip=11)[1:266,] %>% rename(Value = 'Observation Value') %>% mutate(County = 'Stanly', Period = gsub('M','',Period)) %>% select(-Label),
                      read_excel('ur_union.xlsx', trim_ws = T) %>% mutate(County = 'Union', Period = gsub('M', '', Period)),
                      read_excel('ur_york.xlsx', trim_ws = T) %>% mutate(County = 'York', Period = gsub('M', '', Period))) %>%
  mutate(Year = as.integer(Year),
         Period = as.integer(Period),
         Date = as.Date(paste(Year,'-',Period, '-01', sep = '')),
         Value = Value/100) %>%
  rename(Month = Period,
         Unemployment = Value)

# Make income data frame
income <- read.csv('Values.csv') %>%
  mutate(County = gsub(' County, North Carolina', '', County),
         County = gsub(' County, South Carolina', '', County)) %>%
  filter(Indicator == 'Income & Earnings',
         County %in% counties,
         Measure != 'Household Income: Total') %>%
  distinct()
income <- income %>% inner_join((income %>% group_by(County, Year) %>% summarise(Total = sum(Numerator_value))), by = c('County', 'Year'))
`summarise()` has grouped output by 'County'. You can override using the `.groups` argument.
# Make education attainment data frame
education <- read.csv('Values.csv') %>%
  mutate(County = gsub(' County, North Carolina', '', County),
         County = gsub(' County, South Carolina', '', County)) %>%
  filter(Indicator == 'Educational Attainment',
         County %in% counties,
         Measure %in% attainment_lvl) %>%
  distinct()
education <- education %>% inner_join((education %>% group_by(County, Year) %>% summarise(Total = sum(Numerator_value))), by = c('County', 'Year')) %>%
  mutate(
    Order = as.factor(case_when(
      Measure == 'Highest Degree: Less than a High School Diploma' ~ 1,
      Measure == 'Highest Degree: High School Diploma' ~ 2,
      Measure == 'Highest Degree: Some College, No Degree' ~ 3,
      Measure == "Highest Degree: Associate's Degree" ~ 4,
      Measure == "Highest Degree: Bachelor's Degree" ~ 5,
      Measure == "Highest Degree: Graduate or Professional Degree" ~ 6)),
    Measure = gsub("Highest Degree: ","",Measure))
`summarise()` has grouped output by 'County'. You can override using the `.groups` argument.
# Make health care coverage data frame
coverage <- read.csv('Values.csv') %>%
  mutate(County = gsub(' County, North Carolina', '', County),
         County = gsub(' County, South Carolina', '', County)) %>%
  filter(Indicator == 'Health Care Coverage',
         County %in% counties)
# Make housing age data frame
housing <- read.csv('Values.csv') %>%
  mutate(County = gsub(' County, North Carolina', '', County),
         County = gsub(' County, South Carolina', '', County),
         diff = Year-Numerator_value) %>%
  filter(Indicator == 'Housing Stock',
         County %in% counties)
# Make poverty figures data frame
poverty <- read.csv('Values.csv') %>%
  mutate(County = gsub(' County, North Carolina', '', County),
         County = gsub(' County, South Carolina', '', County)) %>%
  filter(Measure == 'Individuals in Poverty',
         Theme == 'Social Well-Being',
         County %in% counties)
# Make transportation means data frame
transportation <- read.csv('Values.csv') %>%
  mutate(County = gsub(' County, North Carolina', '', County),
         County = gsub(' County, South Carolina', '', County)) %>%
  filter(Theme == 'Transportation',
         Measure != 'Commuting Means Total',
         County %in% counties)

Demographics

Population

plot_ly(countypop %>% filter(YEAR == 2019), x = ~POPESTIMATE, y = ~CTYNAME, type = 'bar', color = ~CTYNAME, colors = color_a, orientation = 'h')

plot_ly(countypop, x=~YEAR, y=~CHANGE, color=~CTYNAME, type='scatter', mode='lines', colors=color_a)

Age & Gender

plot_ly(pop_age_gender %>% filter(YEAR == 2017, CTYNAME == 'Charlotte Region'),
        y=~DEMO, x=~PERCENTAGE,
        type='bar', color=~GENDER)

Race & Ethnicity

plot_ly(ethpop %>% filter(YEAR == 2019),
        y=~CTYNAME, x=~POP/TOT_POP, color=~ETHNICITY,
        type='bar') %>%
  layout(barmode = 'stack')

Place of Birth

plot_ly(birthplace %>% filter(Year == 2019),
        y=~County, x=~Numerator_value/Total, color=~Measure,
        type='bar') %>%
  layout(barmode = 'stack')

Economy

Unemployment

plot_ly(unemployment, x=~Date, y=~Unemployment, color=~County, colors=color_a, type='scatter', mode='lines')

plot_ly(unemployment %>% filter(Year==2015, Month==6), x=~Unemployment, y=~County, color=~County, colors=color_a, type='bar')

6/21 Up to this point has been done in the shiny app

Income

#### DFs from Values.csv are missing Anson, Chester, and Stanly Counties
plot_ly(income %>% filter(Year == 2014), y=~County, x=~(Numerator_value/Total), color=~Measure, type='bar') %>%
  layout(barmode='stack')

Education

Educational Attainment

plot_ly(education %>% group_by(Year, Measure, County) %>%
  summarise(Numerator = sum(Numerator_value),
            Denominator = mean(Total)) %>% filter(Year == 2018, Measure == 'High School Diploma'), y=~County, color=~County, colors=color_na, x=~(Numerator/Denominator), type='bar')
`summarise()` has grouped output by 'Year', 'Measure'. You can override using the `.groups` argument.

Health

Health Care Coverage

Housing

Housing Age

housing %>% filter(Year == 2017) %>%
  ggplot(aes(x= County, y = Year-Numerator_value, fill = County)) +
  geom_col() +
  coord_flip()

# axis is giving double the true value
plot_ly(housing %>% filter(Year == 2014), y=~County, x=~diff,
        color=~County, colors=color_na, type='bar')

Social Well-Being

Poverty

plot_ly(poverty %>% filter(Year==2014),
        y=~County, x=~Numerator_value/Denominator_value,
        color=~County, colors=color_na, type='bar')

Transportation

##Commuting Modes

# Way too many missing values to create a substantive visualization.
transportation %>% filter(Year == 2014) %>%
  ggplot(aes(x = County, y = Numerator_value, fill = Measure, position = Measure)) +
  geom_col(position = 'dodge') +
  scale_y_continuous(labels = comma) +
  coord_flip()
---
title: "R Notebook"
output: html_notebook
---

# Load Packages
```{r}
library(tidyverse)
library(ggthemes)
library(scales)
library(readxl)
library(plotly)
```


# Making Dataframes
```{r}
color_a <- c("#58b5e1","#1c5b5a","#46ebdc","#1f4196","#e28de2","#818bd7","#e4ccf1","#82185f","#f849b6","#000000","#5e34bc","#b7d165","#30d52e","#ff5357")
color_na <- c("#1c5b5a","#46ebdc","#e28de2","#818bd7","#e4ccf1","#82185f","#f849b6","#000000","#5e34bc","#30d52e","#ff5357")
counties <- c('Anson', 'Cabarrus', 'Catawba', 'Chester', 'Cleveland', 'Gaston', 'Iredell', 'Lancaster', 'Lincoln', 'Mecklenburg', 'Rowan', 'Stanly', 'Union', 'York')
attainment_lvl <- c('Highest Degree: Less than a High School Diploma', 'Highest Degree: High School Diploma', 'Highest Degree: Some College, No Degree', "Highest Degree: Associate's Degree", "Highest Degree: Bachelor's Degree", "Highest Degree: Graduate or Professional Degree")
foreign_detail <- c('Foreign-Born: Africa', 'Foreign-Born: Asia', 'Foreign-Born: Europe', 'Foreign-Born: Latin America', 'Place of Birth Total')

countypop <- rbind(read_csv("cc-est2019-agesex-37.csv", show_col_types = F),
                   read_csv("cc-est2019-agesex-45.csv", show_col_types = F)) %>%
  select(-SUMLEV, -STATE, -COUNTY) %>%
  mutate(CTYNAME = gsub(' County', '', CTYNAME),
         YEAR = as.integer(YEAR + 2007)) %>%
  filter(CTYNAME %in% counties, YEAR >= 2010,
         !(STNAME == 'South Carolina' & CTYNAME == 'Union')) %>%
  distinct()
# Year 3 is 2010, Year 12 is 2019

# Making Charlotte Region
cr <- countypop[1:10,] %>%
  mutate(CTYNAME = 'Charlotte Region')
for(i in 4:length(colnames(countypop))) {
  for(j in 1:10){
    cr[j,i] <- sum((countypop %>% filter(YEAR == j+2009))[i])
  }
}

# Making Age & Gender data frame
pop_age_gender <- rbind(countypop, cr)
countypop <- cr %>% transmute(YEAR = YEAR, CHARLOTTEPOP = POPESTIMATE) %>% right_join(countypop, by = 'YEAR') %>% mutate(PROPORTION = POPESTIMATE / CHARLOTTEPOP) %>%
  group_by(CTYNAME) %>%
  mutate(CHANGE = ifelse(YEAR == 2010, 0, POPESTIMATE/lag(POPESTIMATE, default = first(YEAR)) - 1)) %>%
  ungroup()

pop_age_gender <- pop_age_gender %>%
  select(-contains('_TOT'), -POPEST_FEM, -POPEST_MALE, -AGE16PLUS_MALE, -AGE16PLUS_FEM, -AGE18PLUS_FEM, -AGE18PLUS_MALE, -UNDER5_FEM, -UNDER5_MALE, -AGE1544_FEM, -AGE1544_MALE, -MEDIAN_AGE_FEM, -MEDIAN_AGE_MALE, -AGE65PLUS_FEM,-AGE65PLUS_MALE, -AGE513_FEM, -AGE513_MALE, -AGE4564_FEM, -AGE4564_MALE, -AGE2544_FEM, -AGE2544_MALE, -AGE1824_FEM, -AGE1824_MALE, -AGE1417_FEM, -AGE1417_MALE) %>%
  rename(AGE004_FEM = AGE04_FEM, AGE004_MALE = AGE04_MALE, AGE0509_MALE = AGE59_MALE, AGE0509_FEM = AGE59_FEM)
pop_age_gender <- pop_age_gender %>%
  pivot_longer(cols = colnames(pop_age_gender[,5:40]), names_to = 'DEMO', values_to = 'POP') %>%
  mutate(PERCENTAGE = POP/POPESTIMATE)
pop_age_gender <- pop_age_gender %>%
  mutate(GENDER = as.factor(ifelse(grepl('MALE', pop_age_gender$DEMO),'MALE','FEMALE')),
         DEMO = gsub('_FEM','', DEMO),
         DEMO = gsub('_MALE','', DEMO),
         DEMO = case_when(DEMO == 'AGE004' ~ '0-04',
                          DEMO == 'AGE0509' ~ '05-09',
                          DEMO == 'AGE1014' ~ '10-14',
                          DEMO == 'AGE1519' ~ '15-19',
                          DEMO == 'AGE2024' ~ '20-24',
                          DEMO == 'AGE2529' ~ '25-29',
                          DEMO == 'AGE3034' ~ '30-34',
                          DEMO == 'AGE3539' ~ '35-39',
                          DEMO == 'AGE4044' ~ '40-44',
                          DEMO == 'AGE4549' ~ '45-49',
                          DEMO == 'AGE5054' ~ '50-54',
                          DEMO == 'AGE5559' ~ '55-59',
                          DEMO == 'AGE6064' ~ '60-64',
                          DEMO == 'AGE6569' ~ '65-69',
                          DEMO == 'AGE7074' ~ '70-74',
                          DEMO == 'AGE7579' ~ '75-79',
                          DEMO == 'AGE8084' ~ '80-84',
                          DEMO == 'AGE85PLUS' ~ '85 and Over'))

# Making ethnicity data frame
ethpop <- rbind(read_csv("cc-est2019-alldata-37.csv", show_col_types = F),
                   read_csv("cc-est2019-alldata-45.csv", show_col_types = F)) %>%
  mutate(CTYNAME = gsub(' County', '', CTYNAME),
         YEAR = as.integer(YEAR + 2007),
         WHITE = NHWA_MALE + NHWA_FEMALE,
         BLACK = NHBA_MALE + NHBA_FEMALE,
         HISPANIC = HWA_MALE + HWA_FEMALE + HBA_MALE + HBA_FEMALE + HIA_MALE + HIA_FEMALE + HAA_MALE + HAA_FEMALE + HNA_MALE + HNA_FEMALE + HIA_MALE + HIA_FEMALE,
         ASIAN = NHAA_MALE + NHAA_FEMALE,
         ISLANDER = NHNA_MALE + NHNA_FEMALE,
         NATIVE = NHIA_MALE + NHIA_FEMALE,
         MULTIRACIAL = TOM_MALE + TOM_FEMALE - HTOM_MALE - HTOM_FEMALE
         ) %>%
  filter(CTYNAME %in% counties, YEAR >= 3, AGEGRP == 0,
         !(STNAME == 'South Carolina' & CTYNAME == 'Union')) %>%
  select(STNAME, CTYNAME, YEAR, TOT_POP, WHITE, BLACK, HISPANIC, ASIAN, ISLANDER, NATIVE, MULTIRACIAL) %>%
  distinct()
ethpop <- ethpop %>%
  pivot_longer(cols = colnames(ethpop[,5:11]), names_to = 'ETHNICITY', values_to = 'POP')

# Making place of birth data frame
birthplace <- read.csv('Values.csv') %>%
  mutate(County = gsub(' County, North Carolina', '', County),
         County = gsub(' County, South Carolina', '', County)) %>%
  filter(Indicator == 'Place of Birth',
         County %in% counties,
         !(Measure %in% foreign_detail)) %>%
  distinct()
birthplace <- birthplace %>% inner_join((birthplace %>% group_by(County, Year) %>% summarise(Total = sum(Numerator_value))), by = c('County', 'Year'))

# Making the unemployment data frame
unemployment <- rbind(read_excel('ur_anson.xlsx', trim_ws = T) %>% mutate(County = 'Anson', Period = gsub('M', '', Period)),
                      read_excel('ur_cabarrus.xlsx', trim_ws = T) %>% mutate(County = 'Cabarrus', Period = gsub('M', '', Period)),
                      read_excel('ur_catawba.xlsx', trim_ws = T) %>% mutate(County = 'Catawba', Period = gsub('M', '', Period)),
                      read_excel('ur_chester.xlsx', trim_ws=T, skip=11)[1:266,] %>% rename(Value = 'Observation Value') %>% mutate(County = 'Chester', Period = gsub('M','',Period)) %>% select(-Label),
                      read_excel('ur_cleveland.xlsx', trim_ws = T) %>% mutate(County = 'Cleveland', Period = gsub('M', '', Period)),
                      read_excel('ur_gaston.xlsx', trim_ws = T) %>% mutate(County = 'Gaston', Period = gsub('M', '', Period)),
                      read_excel('ur_iredell.xlsx', trim_ws = T) %>% mutate(County = 'Iredell', Period = gsub('M', '', Period)),
                      read_excel('ur_lancaster.xlsx', trim_ws = T) %>% mutate(County = 'Lancaster', Period = gsub('M', '', Period)),
                      read_excel('ur_lincoln.xlsx', trim_ws = T) %>% mutate(County = 'Lincoln', Period = gsub('M', '', Period)),
                      read_excel('ur_mecklenburg.xlsx', trim_ws = T) %>% mutate(County = 'Mecklenburg', Period = gsub('M', '', Period)),
                      read_excel('ur_rowan.xlsx', trim_ws = T) %>% mutate(County = 'Rowan', Period = gsub('M', '', Period)),
                      read_excel('ur_stanly.xlsx', trim_ws=T, skip=11)[1:266,] %>% rename(Value = 'Observation Value') %>% mutate(County = 'Stanly', Period = gsub('M','',Period)) %>% select(-Label),
                      read_excel('ur_union.xlsx', trim_ws = T) %>% mutate(County = 'Union', Period = gsub('M', '', Period)),
                      read_excel('ur_york.xlsx', trim_ws = T) %>% mutate(County = 'York', Period = gsub('M', '', Period))) %>%
  mutate(Year = as.integer(Year),
         Period = as.integer(Period),
         Date = as.Date(paste(Year,'-',Period, '-01', sep = '')),
         Value = Value/100) %>%
  rename(Month = Period,
         Unemployment = Value)

# Make income data frame
income <- read.csv('Values.csv') %>%
  mutate(County = gsub(' County, North Carolina', '', County),
         County = gsub(' County, South Carolina', '', County)) %>%
  filter(Indicator == 'Income & Earnings',
         County %in% counties,
         Measure != 'Household Income: Total') %>%
  distinct()
income <- income %>% inner_join((income %>% group_by(County, Year) %>% summarise(Total = sum(Numerator_value))), by = c('County', 'Year'))

# Make education attainment data frame
education <- read.csv('Values.csv') %>%
  mutate(County = gsub(' County, North Carolina', '', County),
         County = gsub(' County, South Carolina', '', County)) %>%
  filter(Indicator == 'Educational Attainment',
         County %in% counties,
         Measure %in% attainment_lvl) %>%
  distinct()
education <- education %>% inner_join((education %>% group_by(County, Year) %>% summarise(Total = sum(Numerator_value))), by = c('County', 'Year')) %>%
  mutate(
    Order = as.factor(case_when(
      Measure == 'Highest Degree: Less than a High School Diploma' ~ 1,
      Measure == 'Highest Degree: High School Diploma' ~ 2,
      Measure == 'Highest Degree: Some College, No Degree' ~ 3,
      Measure == "Highest Degree: Associate's Degree" ~ 4,
      Measure == "Highest Degree: Bachelor's Degree" ~ 5,
      Measure == "Highest Degree: Graduate or Professional Degree" ~ 6)),
    Measure = gsub("Highest Degree: ","",Measure))
# Make health care coverage data frame
coverage <- read.csv('Values.csv') %>%
  mutate(County = gsub(' County, North Carolina', '', County),
         County = gsub(' County, South Carolina', '', County)) %>%
  filter(Indicator == 'Health Care Coverage',
         County %in% counties)
# Make housing age data frame
housing <- read.csv('Values.csv') %>%
  mutate(County = gsub(' County, North Carolina', '', County),
         County = gsub(' County, South Carolina', '', County),
         diff = Year-Numerator_value) %>%
  filter(Indicator == 'Housing Stock',
         County %in% counties)
# Make poverty figures data frame
poverty <- read.csv('Values.csv') %>%
  mutate(County = gsub(' County, North Carolina', '', County),
         County = gsub(' County, South Carolina', '', County)) %>%
  filter(Measure == 'Individuals in Poverty',
         Theme == 'Social Well-Being',
         County %in% counties)
# Make transportation means data frame
transportation <- read.csv('Values.csv') %>%
  mutate(County = gsub(' County, North Carolina', '', County),
         County = gsub(' County, South Carolina', '', County)) %>%
  filter(Theme == 'Transportation',
         Measure != 'Commuting Means Total',
         County %in% counties)
```


# Demographics
## Population
```{r}
plot_ly(countypop %>% filter(YEAR == 2019), x = ~POPESTIMATE, y = ~CTYNAME, type = 'bar', color = ~CTYNAME, colors = color_a, orientation = 'h')

plot_ly(countypop, x=~YEAR, y=~CHANGE, color=~CTYNAME, type='scatter', mode='lines', colors=color_a)
```

## Age & Gender
```{r, fig.height = 6, fig.width= 10, warning=FALSE}
plot_ly(pop_age_gender %>% filter(YEAR == 2017, CTYNAME == 'Charlotte Region'),
        y=~DEMO, x=~PERCENTAGE,
        type='bar', color=~GENDER)
```

## Race & Ethnicity
```{r}
plot_ly(ethpop %>% filter(YEAR == 2019),
        y=~CTYNAME, x=~POP/TOT_POP, color=~ETHNICITY,
        type='bar') %>%
  layout(barmode = 'stack')
```

## Place of Birth
```{r}
plot_ly(birthplace %>% filter(Year == 2019),
        y=~County, x=~Numerator_value/Total, color=~Measure,
        type='bar') %>%
  layout(barmode = 'stack')
```



# Economy
## Unemployment
```{r}
plot_ly(unemployment, x=~Date, y=~Unemployment, color=~County, colors=color_a, type='scatter', mode='lines')

plot_ly(unemployment %>% filter(Year==2015, Month==6), x=~Unemployment, y=~County, color=~County, colors=color_a, type='bar')
```

################   6/21 Up to this point has been done in the shiny app  ######################


## Income
```{r}
#### DFs from Values.csv are missing Anson, Chester, and Stanly Counties
plot_ly(income %>% filter(Year == 2014), y=~County, x=~(Numerator_value/Total), color=~Measure, type='bar') %>%
  layout(barmode='stack')
```

# Education
## Educational Attainment
```{r}
plot_ly(education %>% group_by(Year, Measure, County) %>%
  summarise(Numerator = sum(Numerator_value),
            Denominator = mean(Total)) %>% filter(Year == 2018, Measure == 'High School Diploma'), y=~County, color=~County, colors=color_na, x=~(Numerator/Denominator), type='bar')
```

# Health
## Health Care Coverage
```{r, warning=FALSE}
plot_ly(coverage %>%
         group_by(Year, County, Measure) %>%
         summarise(added=sum(Numerator_value)) %>%
         filter(Year==2016, !(Measure %in% c("Health Insurance Total"))),
       y=~County, x=~added, type='bar', color=~Measure)

plot_ly(coverage %>%
         group_by(Year, County, Measure) %>%
         summarise(added=sum(Numerator_value)) %>%
         filter(Year==2016, Measure=='Children without Health Insurance'),
       y=~County, x=~added, type='bar', color=~County, colors=color_na)
coverage %>% distinct(Measure)
```
# Housing
## Housing Age
```{r}
housing %>% filter(Year == 2017) %>%
  ggplot(aes(x= County, y = Year-Numerator_value, fill = County)) +
  geom_col() +
  coord_flip()
# axis is giving double the true value
plot_ly(housing %>% filter(Year == 2014), y=~County, x=~diff,
        color=~County, colors=color_na, type='bar')
```
# Social Well-Being
## Poverty
```{r}
plot_ly(poverty %>% filter(Year==2014),
        y=~County, x=~Numerator_value/Denominator_value,
        color=~County, colors=color_na, type='bar') %>%
      layout(xaxis=list(range=list(0,0.2)))
```
# Transportation
##Commuting Modes
```{r, eval=FALSE}
# Way too many missing values to create a substantive visualization.
transportation %>% filter(Year == 2014) %>%
  ggplot(aes(x = County, y = Numerator_value, fill = Measure, position = Measure)) +
  geom_col(position = 'dodge') +
  scale_y_continuous(labels = comma) +
  coord_flip()
```




